setwd("C:\\Users\\authorized user\\Dropbox\\STAT445\\week12\\tutorials") (tbl=read.table(file="Husb_wifes.txt",sep="",header=T)) # tbl=matrix(c(24,12,16,48,60,40),byrow=T,nrow=3) # rownames(tbl)=c("A1","A2","A3") # colnames(tbl)=c("B1","B2") ############################################################################### #obtain coordinates of correspondence analysis following the #example 12.8 in the texbook page 722 #This is just to practice the correspondence analysis #The analysis is performed in JMP ############################################################################### ncol(tbl) nrow(tbl) rownames(tbl)=tbl[,1] colnames(tbl)[2:4] mat=as.matrix(tbl[,2:4]) #marginal totals (c=colSums(mat)/sum(mat)) (r=rowSums(mat)/sum(mat)) #correspondence matrix P=mat/sum(mat) #negative square root matrices: Dr=diag(1/sqrt(r)) Dc=diag(1/sqrt(c)) #r %*% t(c) is rank approximation for the correspondence matrix Q=P-r %*% t(c) #Scaled version of the A=Dr %*% Q %*% Dc #singular value decomposition of A #equals to eigen value of AAt and AtA AtA=t(A) %*% (A) AAt=A %*% t(A) #perform singular decomposition (i.e obtain egen value- vector of At(A)) eigAtA=eigen(AtA) eigAAt=eigen(AAt) #coordinates for the correspondence analysis plot #coordinates for Husbands coorH1=sqrt(eigAAt$value[1])*(solve(Dr)%*% (eigAAt$vectors)[,1]) coorH2=sqrt(eigAAt$value[2])*(solve(Dr)%*% (eigAAt$vectors)[,2]) ch=cbind(coorH1,coorH2) rownames(ch)=rownames(tbl) #coordinates for wifes coorW1=sqrt(eigAtA$value[1])*(solve(Dc)%*%(eigAtA$vectors)[,1]) coorW2=sqrt(eigAtA$value[2])*(solve(Dc)%*%(eigAtA$vectors)[,2]) cw=cbind(coorW1,coorW2) rownames(cw)=colnames(tbl)[2:4] #check the best rank K approximation to the matrix Q (sqrt(eigAAt$value[1])*(solve(Dr)%*% eigAAt$vectors[,1]) %*% t(solve(Dc)%*% eigAtA$vectors[,1])+ sqrt(eigAAt$value[2])*(solve(Dr)%*% eigAAt$vectors[,2]) %*% t(solve(Dc)%*% eigAtA$vectors[,2])+ sqrt(eigAAt$value[3])*(solve(Dr)%*% eigAAt$vectors[,3]) %*% t(solve(Dc)%*% eigAtA$vectors[,3])) caption=c("HT","HM","HS" ,"WT","WM","WS") #note that png will create a .png file with the plot in the folder where #you hvae set up your working directory png("correspondence-Manually.png") par(mar=c(5.1, 4.1, 4.1, 14.1), xpd=TRUE) plot(c(coorH2,coorW2),c(coorH1,coorW1),xlab="c1",ylab="c2", main="Corresponedence analysis plot",col=c(rep("blue",3),rep("red",3)),pch=c(rep(16,3),rep(18,3))) text(c(coorH2,coorW2),c(coorH1,coorW1),labels=caption,col=c(rep("blue",3),rep("red",3))) legend("topright", inset=c(-0.70,0), c("HT- Husband Tall", "HM- Husband Medium","HS-Husband Short", "WT- Wife Tall","WM-Wife Medium","WS-Wife Short"), pch=c(rep(16,3),rep(18,3)),col=c(rep("blue",3),rep("red",3)) ) dev.off() ################################################################################# #results a quite different from those obtained from JMP ################################################################################# ################################################################################# #END of the correspondence analysis done step by step in R ################################################################################# ################################################################################# #Now create correspondence analysis plot from the JMP output #for the correspondence analysis #Download JMP from this site: #https://www.sfu.ca/itservices/technical/software.html #IN JMP project file (Husb_wifes_proj.jmpprj): #try to run correspondence analysis on #the table: stacked husb_wifes_erroneous.jmp #see what happens when you introduce an #data entry error ################################################################################# h=read.csv(file="husbands.csv",header=T) w=read.csv(file="wifes.csv",header=T) coor1=c(h$c1,w$c1) coor2=c(h$c2,w$c2) png("correspondence-JMP.png") caption=c("HM","HS","HT" ,"WM","WS","WT") #set up the margins of the plot par(mar=c(5.1, 4.1, 4.1, 14.1), xpd=TRUE) plot(coor1,coor2,xlab="c1",ylab="c2",main="Corresponedence analysis plot", col=c(rep("blue",3),rep("red",3)),pch=c(rep(16,3),rep(18,3))) text(coor1,coor2,labels=caption,col=c(rep("blue",3),rep("red",3))) legend("topright", inset=c(-0.70,0), c("HT- Husband Tall", "HM- Husband Medium","HS-Husband Short", "WT- Wife Tall","WM-Wife Medium","WS-Wife Short"), pch=c(rep(16,3),rep(18,3)),col=c(rep("blue",3),rep("red",3)) ) dev.off() #################################################################################